library(Biostrings)
library(tidyverse)
library(ggplot2)
library("ggbeeswarm") # avoid overlapping points 
# library(patchwork)
head( methods(class = "DNAStringSet") )
## [1] "!,List-method"           "!=,ANY,Vector-method"   
## [3] "!=,Vector,ANY-method"    "!=,Vector,Vector-method"
## [5] "[,List-method"           "[[,List-method"

methods

methods(generic = "reverseComplement")
##  [1] reverseComplement,DNAString-method                
##  [2] reverseComplement,DNAStringSet-method             
##  [3] reverseComplement,MaskedDNAString-method          
##  [4] reverseComplement,MaskedRNAString-method          
##  [5] reverseComplement,matrix-method                   
##  [6] reverseComplement,QualityScaledDNAStringSet-method
##  [7] reverseComplement,QualityScaledRNAStringSet-method
##  [8] reverseComplement,RNAString-method                
##  [9] reverseComplement,RNAStringSet-method             
## [10] reverseComplement,XStringViews-method             
## see '?methods' for accessing help and source code

GenomicRanges package

Examples: * Inference of gene locations * reads alignments to quantify gene expression * infer location of chipseq reads (location of the TF binding site to the closest gene)

## Loading required package: GenomeInfoDb

GRange

Exon

Grange: class. includes genomic locations & associations * seqnames: could be chromosomes, contigs, … * star (*) means unknown strand

# + = plus strand 
exon = GRanges(c('chr1:20-30:+', 'chr1:40-50:+', "chr1:45-55")) 
exon
## GRanges object with 3 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]     chr1     20-30      +
##   [2]     chr1     40-50      +
##   [3]     chr1     45-55      *
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths
exon = GRanges(c('chr1:20-30:+', 'chr1:40-50:+', "chr1:45-55:+")) 

exon is a vector (like DNAstring)

length(exon)
## [1] 3

start coordinate

start(exon)
## [1] 20 40 45
end(exon)
## [1] 30 50 55

Implementation of bioconductor assumes that intervals are closed intervals (in the operations) * e.g. 40 to 50: 50 - 40 + 1 = 11

width(exon)
## [1] 11 11 11

Collapse all exons regions

reduce(exon)
## GRanges object with 2 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]     chr1     20-30      +
##   [2]     chr1     40-55      +
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths

Disjoint range

disjoin(exon)
## GRanges object with 4 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]     chr1     20-30      +
##   [2]     chr1     40-44      +
##   [3]     chr1     45-50      +
##   [4]     chr1     51-55      +
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths
promoters(exon, upstream=2000, downstream=3000)
## GRanges object with 3 ranges and 0 metadata columns:
##       seqnames     ranges strand
##          <Rle>  <IRanges>  <Rle>
##   [1]     chr1 -1980-3019      +
##   [2]     chr1 -1960-3039      +
##   [3]     chr1 -1955-3044      +
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths

Project reads onto the genome * Interpretation: 19 zeros, 11 ones, 9 zeros, 5 ones, 6 twos, 5 ones * (see representation in next chunk for mvisual representation)

coverage(exon)
## RleList of length 1
## $chr1
## integer-Rle of length 55 with 6 runs
##   Lengths: 19 11  9  5  6  5
##   Values :  0  1  0  1  2  1
coverage(exon) %>% unlist() %>% as.integer()
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0
## [36] 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1

Coerce it as granges

cvg <- coverage(exon)
cvg <- cvg %>% as('GRanges')
cvg
## GRanges object with 6 ranges and 1 metadata column:
##       seqnames    ranges strand |     score
##          <Rle> <IRanges>  <Rle> | <integer>
##   [1]     chr1      1-19      * |         0
##   [2]     chr1     20-30      * |         1
##   [3]     chr1     31-39      * |         0
##   [4]     chr1     40-44      * |         1
##   [5]     chr1     45-50      * |         2
##   [6]     chr1     51-55      * |         1
##   -------
##   seqinfo: 1 sequence from an unspecified genome
granges(cvg)
## GRanges object with 6 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]     chr1      1-19      *
##   [2]     chr1     20-30      *
##   [3]     chr1     31-39      *
##   [4]     chr1     40-44      *
##   [5]     chr1     45-50      *
##   [6]     chr1     51-55      *
##   -------
##   seqinfo: 1 sequence from an unspecified genome

You can add elements to this dataframe

cvg$score <- c(1,0,0,4,3,5)
cvg
## GRanges object with 6 ranges and 1 metadata column:
##       seqnames    ranges strand |     score
##          <Rle> <IRanges>  <Rle> | <numeric>
##   [1]     chr1      1-19      * |         1
##   [2]     chr1     20-30      * |         0
##   [3]     chr1     31-39      * |         0
##   [4]     chr1     40-44      * |         4
##   [5]     chr1     45-50      * |         3
##   [6]     chr1     51-55      * |         5
##   -------
##   seqinfo: 1 sequence from an unspecified genome
mcols(cvg)
## DataFrame with 6 rows and 1 column
##       score
##   <numeric>
## 1         1
## 2         0
## 3         0
## 4         4
## 5         3
## 6         5
df <- DataFrame(
  i   = 1:3,
  dna = DNAStringSet(c('AAA','CCC','GGG')),
  gr  = exon
)
df
## DataFrame with 3 rows and 3 columns
##           i            dna           gr
##   <integer> <DNAStringSet>    <GRanges>
## 1         1            AAA chr1:20-30:+
## 2         2            CCC chr1:40-50:+
## 3         3            GGG chr1:45-55:+

SNP

snp <- GRanges(c('chr1:23435','chr2:54323'))

Shift SNP locations (e.g. if another reference was used):

shift(snp, 1)
## GRanges object with 2 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]     chr1     23436      *
##   [2]     chr2     54324      *
##   -------
##   seqinfo: 2 sequences from an unspecified genome; no seqlengths

Extend SNP ranges:

flank(snp, 10)
## GRanges object with 2 ranges and 0 metadata columns:
##       seqnames      ranges strand
##          <Rle>   <IRanges>  <Rle>
##   [1]     chr1 23425-23434      *
##   [2]     chr2 54313-54322      *
##   -------
##   seqinfo: 2 sequences from an unspecified genome; no seqlengths

How many of the exons contain SNPs inside, what is the overlap?

snp <- GRanges(c('chr1:15:+','chr1:25:+','chr1:35:+'))
countOverlaps(snp, exon)
## [1] 0 1 0

Viceversa:

countOverlaps(exon, snp)
## [1] 1 0 0
findOverlaps(snp, exon)
## Hits object with 1 hit and 0 metadata columns:
##       queryHits subjectHits
##       <integer>   <integer>
##   [1]         2           1
##   -------
##   queryLength: 3 / subjectLength: 3
snp[snp %over% exon]
## GRanges object with 1 range and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]     chr1        25      +
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] GenomicRanges_1.36.0 GenomeInfoDb_1.20.0  ggbeeswarm_0.6.0    
##  [4] forcats_0.4.0        stringr_1.4.0        dplyr_0.8.2         
##  [7] purrr_0.3.2          readr_1.3.1          tidyr_0.8.3         
## [10] tibble_2.1.3         ggplot2_3.2.0        tidyverse_1.2.1     
## [13] Biostrings_2.52.0    XVector_0.24.0       IRanges_2.18.1      
## [16] S4Vectors_0.22.0     BiocGenerics_0.30.0 
## 
## loaded via a namespace (and not attached):
##  [1] beeswarm_0.2.3         tidyselect_0.2.5       xfun_0.8              
##  [4] haven_2.1.0            lattice_0.20-38        colorspace_1.4-1      
##  [7] generics_0.0.2         htmltools_0.3.6        yaml_2.2.0            
## [10] rlang_0.4.0            pillar_1.4.2           glue_1.3.1            
## [13] withr_2.1.2            modelr_0.1.4           readxl_1.3.1          
## [16] GenomeInfoDbData_1.2.1 zlibbioc_1.30.0        munsell_0.5.0         
## [19] gtable_0.3.0           cellranger_1.1.0       rvest_0.3.4           
## [22] evaluate_0.14          knitr_1.23             vipor_0.4.5           
## [25] broom_0.5.2            Rcpp_1.0.1             scales_1.0.0          
## [28] backports_1.1.4        jsonlite_1.6           hms_0.4.2             
## [31] digest_0.6.19          stringi_1.4.3          grid_3.6.0            
## [34] bitops_1.0-6           cli_1.1.0              tools_3.6.0           
## [37] magrittr_1.5           RCurl_1.95-4.12        lazyeval_0.2.2        
## [40] crayon_1.3.4           pkgconfig_2.0.2        xml2_1.2.0            
## [43] lubridate_1.7.4        assertthat_0.2.1       rmarkdown_1.13        
## [46] httr_1.4.0             rstudioapi_0.10        R6_2.4.0              
## [49] nlme_3.1-140           compiler_3.6.0

Visualizations

Base plots

head(DNase)
## Grouped Data: density ~ conc | Run
##   Run       conc density
## 1   1 0.04882812   0.017
## 2   1 0.04882812   0.018
## 3   1 0.19531250   0.121
## 4   1 0.19531250   0.124
## 5   1 0.39062500   0.206
## 6   1 0.39062500   0.215
{
  plot(DNase$conc, DNase$density,
     xlab="conc", ylab="density",
     col=DNase$Run, cex=3)
  abline(v=DNase$conc, lwd=0.05, lty="dotted")
}

hist(DNase$conc)

boxplot(density ~ Run, data=DNase)

GGplot

ggplot(data=DNase, aes(x=Run, y=density, color=Run)) + geom_boxplot()

ggplot(data=DNase, aes(x=conc, y=density, color=Run)) + geom_point(shape=1, size=2) + 
  geom_smooth(method="loess")

Hiiragi2013 dataset

load("hiiragi2013.rda")
library(Biobase)
hiiragi2013
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 45101 features, 101 samples 
##   element names: exprs 
## protocolData
##   sampleNames: 1 E3.25 2 E3.25 ... 101 E4.5 (FGF4-KO) (101 total)
##   varLabels: ScanDate
##   varMetadata: labelDescription
## phenoData
##   sampleNames: 1 E3.25 2 E3.25 ... 101 E4.5 (FGF4-KO) (101 total)
##   varLabels: File.name Embryonic.day ... sampleColour (8 total)
##   varMetadata: labelDescription
## featureData
##   featureNames: 1415670_at 1415671_at ... AFFX-TrpnX-M_at (45101
##     total)
##   fvarLabels: symbol genename ensembl
##   fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation: mouse4302
dim(hiiragi2013)
## Features  Samples 
##    45101      101

Extract quantitative values

head(exprs(hiiragi2013))
##                1 E3.25   2 E3.25  3 E3.25   4 E3.25   5 E3.25   6 E3.25
## 1415670_at    4.910459  7.526672 6.956328  6.424048  5.105808  5.856685
## 1415671_at    9.768979  9.144228 9.295010 11.059831  9.376749  9.681017
## 1415672_at   10.411893 10.918942 9.495738 10.317996 11.143684 10.234943
## 1415673_at    5.618108  6.439416 6.730465  4.914527  5.619778  7.188673
## 1415674_a_at  7.541891  8.380285 8.480580  7.977363  8.650312  8.639637
## 1415675_at    8.590070  7.661697 8.741957  9.147643  8.868919  8.595630
##                7 E3.25  8 E3.25   9 E3.25 10 E3.25  11 E3.25  12 E3.25
## 1415670_at    5.059961 4.574661  8.123073 5.464257  6.378490  5.980889
## 1415671_at    7.665886 9.325743  9.724729 9.389818 10.741852 10.076715
## 1415672_at   10.642970 9.304958 11.037632 9.754123 10.574534  9.557437
## 1415673_at    6.395441 6.405085  6.542729 6.842668  4.638282  7.233038
## 1415674_a_at  7.645431 5.265520  6.748849 6.951920  8.436673  8.326323
## 1415675_at    8.266214 8.359522  8.896249 9.503661  8.972576  7.525839
##               13 E3.25 14 E3.25 15 E3.25  16 E3.25 17 E3.25  18 E3.25
## 1415670_at    8.535549 8.084051 8.466555  7.721368 8.043886  7.936210
## 1415671_at   10.457479 9.152969 9.846789  9.662910 9.719014 11.228105
## 1415672_at    9.903827 9.857872 9.448139 10.304187 9.791350 11.376457
## 1415673_at    6.570877 7.860524 6.508946  5.871037 3.420043  6.258020
## 1415674_a_at  6.637048 6.486028 5.965517  7.148297 8.682546  9.539372
## 1415675_at    6.908303 8.377355 8.857803  8.232332 8.736221  8.259772
##               19 E3.25  20 E3.25  21 E3.25  22 E3.25  23 E3.25  24 E3.25
## 1415670_at    8.372314  8.790075  7.699547  9.160169  9.078835  7.026946
## 1415671_at   11.097919 11.161601 11.484050 11.868276 11.073027 11.077749
## 1415672_at   11.325410 11.202199 11.079766 10.111392 10.758893  9.982358
## 1415673_at    7.040959  7.061794  7.767596  7.186408  5.480284  3.787961
## 1415674_a_at  8.242590  9.988684  9.263087 10.173450  8.572024  9.577342
## 1415675_at    8.078979  9.553001  9.608263  8.958113  9.229941  8.644070
##               25 E3.25  26 E3.25  27 E3.25  28 E3.25  29 E3.25  30 E3.25
## 1415670_at    9.481374  5.042345 10.060482  8.720720  7.640830  8.198603
## 1415671_at   11.648165 10.192303 11.588828 10.410176 10.848364 11.786923
## 1415672_at   10.845297 11.528406 10.822370 11.443782 10.777431 10.848957
## 1415673_at    6.547257  8.512493  7.591152  4.543854  6.596599  8.371666
## 1415674_a_at  9.494986  8.829271  9.548763  8.830112  7.970468  9.456158
## 1415675_at    8.982421  8.796370  7.805410  8.821485  8.694920 10.280600
##               31 E3.25  32 E3.25  33 E3.25  34 E3.25  35 E3.25  36 E3.25
## 1415670_at    8.706866  9.447069  8.296739  6.834025  8.541503  6.077360
## 1415671_at   10.700026 10.540521 11.572670 10.797511 11.134510 11.248140
## 1415672_at   11.252663 10.440958 11.210719 11.234910 10.464442 10.749835
## 1415673_at    8.056224  7.939600  6.608207  4.958611  4.800360  7.310682
## 1415674_a_at  8.359363  9.329402 10.167002  8.767243  8.556250  7.314833
## 1415675_at    9.559761  9.280311  8.273592  8.834519  7.971188  8.666798
##              37 E3.5 (PE) 38 E3.5 (PE) 39 E3.5 (EPI) 40 E3.5 (EPI)
## 1415670_at       8.273882     7.620525      7.866760      8.415855
## 1415671_at       9.344318    11.214708     10.018825     10.772849
## 1415672_at       8.263001    10.262658     10.455752     10.575892
## 1415673_at       4.733935     7.198648      7.192898      5.688193
## 1415674_a_at     7.658033     5.559332      8.075276      8.645646
## 1415675_at       8.864403     8.463336      8.232021      8.945480
##              41 E3.5 (PE) 42 E3.5 (EPI) 43 E3.5 (PE) 44 E3.5 (EPI)
## 1415670_at      10.069573      5.186645     8.277925      8.907035
## 1415671_at      10.995412     10.281639    10.322795     11.227185
## 1415672_at      10.434905     11.055458    10.326748     10.353424
## 1415673_at       7.873462      7.056827     7.628060      7.880329
## 1415674_a_at     8.924068      5.786938     8.090168      7.259978
## 1415675_at       9.264361      9.268250     5.406516      8.734146
##              45 E3.5 (EPI) 46 E3.5 (EPI) 47 E3.5 (EPI) 48 E3.5 (EPI)
## 1415670_at        8.037981      9.564671      8.413460      6.067820
## 1415671_at       10.039085      9.807397     10.763823      9.366529
## 1415672_at       10.670212      8.679185     11.023366      8.100330
## 1415673_at        7.678721      7.319281      6.180487      5.994487
## 1415674_a_at      4.957499      9.630059      6.087135      8.319604
## 1415675_at        9.492942      7.148993      8.243656      8.465048
##              49 E3.5 (PE) 50 E3.5 (PE) 51 E3.5 (PE) 52 E3.5 (PE)
## 1415670_at       7.283398     8.162521     8.915069     8.003773
## 1415671_at      10.404443    10.821639    10.302784     9.375657
## 1415672_at       9.640297    10.852335    10.524796     9.453517
## 1415673_at       9.593375     8.803020     7.272420     6.506133
## 1415674_a_at     9.804526     8.337384     8.640091     9.231093
## 1415675_at       8.572719     9.777869     8.585933     8.941711
##              53 E3.5 (PE) 54 E3.5 (PE) 55 E3.5 (EPI) 56 E3.5 (EPI)
## 1415670_at       8.504471     9.230439      7.280949      7.546062
## 1415671_at       8.878585     9.419837     10.289884      8.132641
## 1415672_at      10.326346     9.670100     10.294861      9.830191
## 1415673_at       7.222724     7.341542      5.511479      5.819504
## 1415674_a_at     9.021208     8.154534      8.516675      7.097983
## 1415675_at      10.095101     9.219913      8.635134      9.563152
##              57 E3.5 (EPI) 58 E3.5 (PE) 59 E4.5 (PE) 60 E4.5 (EPI)
## 1415670_at        7.609851     7.600535     9.503289      8.657604
## 1415671_at        9.193082    10.539076    11.458339      8.824234
## 1415672_at       11.286201    10.426612     6.699087      9.496283
## 1415673_at        8.123130     6.496563     6.765473      8.776755
## 1415674_a_at      8.324338     6.429535     9.974538      9.098873
## 1415675_at        8.293887     9.680122     9.294144      9.221649
##              61 E4.5 (PE) 62 E4.5 (EPI) 63 E4.5 (EPI) 64 E4.5 (EPI)
## 1415670_at       9.373941      7.420272      8.363999     10.516034
## 1415671_at      11.336816      9.386655      8.589102      9.612615
## 1415672_at      10.131304      8.756391      9.728199      8.839825
## 1415673_at       9.114517      6.190662      5.844180      6.814642
## 1415674_a_at     8.894763      9.312049      8.043095      8.597153
## 1415675_at       9.983117      9.027379      7.846470      7.746031
##              65 E4.5 (PE) 66 E4.5 (PE) 67 E3.25 (FGF4-KO)
## 1415670_at      10.233514     9.086783           6.985313
## 1415671_at      10.518523    10.833673          10.615071
## 1415672_at      10.099166     9.295808           9.754342
## 1415673_at       9.513386     9.252811           5.287788
## 1415674_a_at     6.982438     8.848143           9.092702
## 1415675_at       9.453644     9.127367           8.901534
##              68 E3.25 (FGF4-KO) 69 E3.25 (FGF4-KO) 70 E3.25 (FGF4-KO)
## 1415670_at             5.814319           7.690732           8.075658
## 1415671_at            10.379045          10.567961          10.715209
## 1415672_at             7.039647           8.955027           9.063367
## 1415673_at             6.039571           5.684059           5.794135
## 1415674_a_at           7.039116           8.084398           7.926238
## 1415675_at             7.648410           7.952806           8.899669
##              71 E3.25 (FGF4-KO) 72 E3.25 (FGF4-KO) 73 E3.25 (FGF4-KO)
## 1415670_at             8.267528           5.180107           7.199486
## 1415671_at            10.131859          10.250684          10.576188
## 1415672_at            10.177928           9.551955           8.138070
## 1415673_at             7.735918           6.620887           6.392453
## 1415674_a_at           9.796577           9.031277           8.916845
## 1415675_at             8.878228           9.071759           7.943296
##              74 E3.25 (FGF4-KO) 75 E3.25 (FGF4-KO) 76 E3.25 (FGF4-KO)
## 1415670_at             6.384739           8.767213           5.770023
## 1415671_at            10.094252          11.502889          10.000846
## 1415672_at             7.661539           9.856569           3.375618
## 1415673_at             5.538817           6.186746           4.806714
## 1415674_a_at           8.861186           9.431775           9.454379
## 1415675_at             7.319638           9.263623           8.817041
##              77 E3.25 (FGF4-KO) 78 E3.25 (FGF4-KO) 79 E3.25 (FGF4-KO)
## 1415670_at             6.675188           9.397408          10.440421
## 1415671_at            11.100101          10.160398          11.297621
## 1415672_at             8.726354          10.674683          10.649906
## 1415673_at             4.490444           8.232427           6.235927
## 1415674_a_at           8.552819           7.264563           9.667478
## 1415675_at             8.675685           9.566866           8.457816
##              80 E3.25 (FGF4-KO) 81 E3.25 (FGF4-KO) 82 E3.25 (FGF4-KO)
## 1415670_at             7.506113           9.537808           9.626242
## 1415671_at            11.336896          11.714058          11.966809
## 1415672_at            10.283193          10.965307           6.882138
## 1415673_at            10.018832           9.438075           5.373948
## 1415674_a_at           9.453293           9.279124           8.096551
## 1415675_at             9.221731           8.961749           8.976936
##              83 E3.25 (FGF4-KO) 84 E3.5 (FGF4-KO) 85 E3.5 (FGF4-KO)
## 1415670_at             9.082400          8.410321          8.182409
## 1415671_at            11.750857         11.390676         11.009250
## 1415672_at             9.453529         11.097374         11.176322
## 1415673_at             8.095055          5.609975          6.732978
## 1415674_a_at           9.287606         10.106055          8.930789
## 1415675_at             8.318733          9.556829          8.767664
##              86 E3.5 (FGF4-KO) 87 E3.5 (FGF4-KO) 88 E3.5 (FGF4-KO)
## 1415670_at            8.084051          7.178949          8.394489
## 1415671_at           11.369503         11.531595         11.308553
## 1415672_at           10.426476          9.838740         11.079161
## 1415673_at            8.229005          7.149751          7.641474
## 1415674_a_at          7.324169          8.711869          9.361509
## 1415675_at            8.708365          8.887450          9.011559
##              89 E3.5 (FGF4-KO) 90 E3.5 (FGF4-KO) 91 E3.5 (FGF4-KO)
## 1415670_at            8.410588          9.306985          8.157402
## 1415671_at           11.267228         11.779960         11.623497
## 1415672_at            9.704876         10.726146         10.753831
## 1415673_at            6.607004          7.004673          8.000327
## 1415674_a_at          9.404744          7.730486          9.044348
## 1415675_at            8.845187          7.243588          9.564489
##              92 E4.5 (FGF4-KO) 93 E4.5 (FGF4-KO) 94 E4.5 (FGF4-KO)
## 1415670_at            8.359740          9.575871          6.179409
## 1415671_at           11.175453          9.591774          9.518947
## 1415672_at           10.885286          9.339838          9.370398
## 1415673_at            4.957310          5.652768          6.910332
## 1415674_a_at          8.592410          5.954694          8.159053
## 1415675_at            6.257513          8.496400          7.931365
##              95 E4.5 (FGF4-KO) 96 E4.5 (FGF4-KO) 97 E4.5 (FGF4-KO)
## 1415670_at            8.601108          8.051324          4.911292
## 1415671_at           10.526221         10.770072          6.375060
## 1415672_at            7.787682         10.084626          8.773603
## 1415673_at            6.694354          6.104253          5.313348
## 1415674_a_at          9.041147          8.667740          8.028946
## 1415675_at            6.235425          9.763872          9.293012
##              98 E4.5 (FGF4-KO) 99 E4.5 (FGF4-KO) 100 E4.5 (FGF4-KO)
## 1415670_at            3.801598          8.360182           7.999680
## 1415671_at           11.130234          8.052922           7.425286
## 1415672_at           10.768072         10.327700           7.729140
## 1415673_at            4.872900          8.932054           5.028091
## 1415674_a_at          5.790260          9.019782           5.045660
## 1415675_at            8.688155          7.368104           7.122485
##              101 E4.5 (FGF4-KO)
## 1415670_at             8.193070
## 1415671_at             9.148713
## 1415672_at            10.283193
## 1415673_at             4.505772
## 1415674_a_at           8.652528
## 1415675_at             8.672924

Extract sample annotations

# View(pData(hiiragi2013))
dftx = data.frame(t(exprs(hiiragi2013)), pData(hiiragi2013))
ggplot(dftx, aes(x=X1426642_at, y = X1418765_at)) + 
  geom_point(aes(colour=sampleColour)) + 
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

selectedProbes <- c(Fgf4 = "1420085_at", Gata4 = "1418863_at",
                    Gata6 = "1425463_at",  Sox2 = "1416967_at")
library(dplyr)
library(tidyverse)
tmp <- data.frame(t(exprs(hiiragi2013[selectedProbes, ])))
names(tmp) <- names(selectedProbes)
tmp$sample <- rownames(tmp)

Wide to long format

genes <- gather(tmp, "gene","expression", -sample)
head(genes)
##    sample gene expression
## 1 1 E3.25 Fgf4   3.027715
## 2 2 E3.25 Fgf4   9.293016
## 3 3 E3.25 Fgf4   2.940142
## 4 4 E3.25 Fgf4   9.715243
## 5 5 E3.25 Fgf4   8.924228
## 6 6 E3.25 Fgf4  11.325952
genes %>%
    filter(gene == "Gata4") %>%
    ggplot(aes(x = expression)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p <- ggplot(genes, aes(x = gene, y = expression, fill = gene))
bxplot <- p + geom_boxplot()
bxplot

p <- ggplot(genes, aes(x = gene, y = expression, fill = gene))
bxplot <- p + geom_violin()
bxplot

Beeswarm plots

jtrplot <- p +
    geom_jitter(aes(colour = gene)) +
    theme(legend.position = "none")
jtrplot

dotplot <- p + geom_dotplot(binaxis = "y", binwidth = 1/6,
                            stackdir = "center", stackratio = 0.75,
                            aes(color = gene)) +
    theme(legend.position = "none")
dotplot

Beeplot: to avoid overlapping points

beeplot <- p + geom_beeswarm(aes(color = gene)) + 
    theme(legend.position = "none")
beeplot

# jtrplot + dotplot + beeplot
dfx <- as.data.frame(Biobase::exprs(hiiragi2013))

scp <- ggplot(dfx, aes(x=`59 E4.5 (PE)`, y=`92 E4.5 (FGF4-KO)`)) + geom_point()
scp

scp <- ggplot(dfx, aes(x=`59 E4.5 (PE)`, y=`92 E4.5 (FGF4-KO)`)) + geom_point(alpha=0.1)
scp

scp <- ggplot(dfx, aes(x=`59 E4.5 (PE)`, y=`92 E4.5 (FGF4-KO)`)) + geom_density2d(h=0.5, bins=60)
scp

scp <- ggplot(dfx, aes(x=`59 E4.5 (PE)`, y=`92 E4.5 (FGF4-KO)`)) + geom_density2d()
scp

scp <- ggplot(dfx, aes(x=`59 E4.5 (PE)`, y=`92 E4.5 (FGF4-KO)`)) + geom_hex()
scp

Facet grid

Plotly

Useful to check annotations of data points

library("plotly")
scp <- ggplot(dfx[1:100, ],
              aes(x= `59 E4.5 (PE)`, y = `92 E4.5 (FGF4-KO)`))

scp2 <- scp + geom_point()
ggplotly(scp2)

Session 3 - Summarized Experiment

Summarized experiment levels

library(SummarizedExperiment)
library(airway)
set.seed(123)

data(airway)
airway
## class: RangedSummarizedExperiment 
## dim: 64102 8 
## metadata(1): ''
## assays(1): counts
## rownames(64102): ENSG00000000003 ENSG00000000005 ... LRG_98 LRG_99
## rowData names(0):
## colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
## colData names(9): SampleName cell ... Sample BioSample

Rows: Ensemble genes columns: samples

dim(airway)
## [1] 64102     8
dim(assay(airway))
## [1] 64102     8
nrow(colData(airway))
## [1] 8
dplyr::glimpse( dimnames(airway) )
## List of 2
##  $ : chr [1:64102] "ENSG00000000003" "ENSG00000000005" "ENSG00000000419" "ENSG00000000457" ...
##  $ : chr [1:8] "SRR1039508" "SRR1039509" "SRR1039512" "SRR1039513" ...
colData(airway)
## DataFrame with 8 rows and 9 columns
##            SampleName     cell      dex    albut        Run avgLength
##              <factor> <factor> <factor> <factor>   <factor> <integer>
## SRR1039508 GSM1275862   N61311    untrt    untrt SRR1039508       126
## SRR1039509 GSM1275863   N61311      trt    untrt SRR1039509       126
## SRR1039512 GSM1275866  N052611    untrt    untrt SRR1039512       126
## SRR1039513 GSM1275867  N052611      trt    untrt SRR1039513        87
## SRR1039516 GSM1275870  N080611    untrt    untrt SRR1039516       120
## SRR1039517 GSM1275871  N080611      trt    untrt SRR1039517       126
## SRR1039520 GSM1275874  N061011    untrt    untrt SRR1039520       101
## SRR1039521 GSM1275875  N061011      trt    untrt SRR1039521        98
##            Experiment    Sample    BioSample
##              <factor>  <factor>     <factor>
## SRR1039508  SRX384345 SRS508568 SAMN02422669
## SRR1039509  SRX384346 SRS508567 SAMN02422675
## SRR1039512  SRX384349 SRS508571 SAMN02422678
## SRR1039513  SRX384350 SRS508572 SAMN02422670
## SRR1039516  SRX384353 SRS508575 SAMN02422682
## SRR1039517  SRX384354 SRS508576 SAMN02422673
## SRR1039520  SRX384357 SRS508579 SAMN02422683
## SRR1039521  SRX384358 SRS508580 SAMN02422677

No rowdata:

rowData(airway)
## DataFrame with 64102 rows and 0 columns

rowRanges: * 1 entry per row * contains info about the exons

rowRanges(airway)
## GRangesList object of length 64102:
## $ENSG00000000003 
## GRanges object with 17 ranges and 2 metadata columns:
##        seqnames            ranges strand |   exon_id       exon_name
##           <Rle>         <IRanges>  <Rle> | <integer>     <character>
##    [1]        X 99883667-99884983      - |    667145 ENSE00001459322
##    [2]        X 99885756-99885863      - |    667146 ENSE00000868868
##    [3]        X 99887482-99887565      - |    667147 ENSE00000401072
##    [4]        X 99887538-99887565      - |    667148 ENSE00001849132
##    [5]        X 99888402-99888536      - |    667149 ENSE00003554016
##    ...      ...               ...    ... .       ...             ...
##   [13]        X 99890555-99890743      - |    667156 ENSE00003512331
##   [14]        X 99891188-99891686      - |    667158 ENSE00001886883
##   [15]        X 99891605-99891803      - |    667159 ENSE00001855382
##   [16]        X 99891790-99892101      - |    667160 ENSE00001863395
##   [17]        X 99894942-99894988      - |    667161 ENSE00001828996
## 
## ...
## <64101 more elements>
## -------
## seqinfo: 722 sequences (1 circular) from an unspecified genome

Metadata function

metadata(airway)
## [[1]]
## Experiment data
##   Experimenter name: Himes BE 
##   Laboratory: NA 
##   Contact information:  
##   Title: RNA-Seq transcriptome profiling identifies CRISPLD2 as a glucocorticoid responsive gene that modulates cytokine function in airway smooth muscle cells. 
##   URL: http://www.ncbi.nlm.nih.gov/pubmed/24926665 
##   PMIDs: 24926665 
## 
##   Abstract: A 226 word abstract is available. Use 'abstract' method.
message(class(assay(airway)), dim(assay(airway)))
## matrix641028
head(assay(airway))
##                 SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
## ENSG00000000003        679        448        873        408       1138
## ENSG00000000005          0          0          0          0          0
## ENSG00000000419        467        515        621        365        587
## ENSG00000000457        260        211        263        164        245
## ENSG00000000460         60         55         40         35         78
## ENSG00000000938          0          0          2          0          1
##                 SRR1039517 SRR1039520 SRR1039521
## ENSG00000000003       1047        770        572
## ENSG00000000005          0          0          0
## ENSG00000000419        799        417        508
## ENSG00000000457        331        233        229
## ENSG00000000460         63         76         60
## ENSG00000000938          0          0          0

Matrix behavior of summarized type

matrix: all elements must be of the same type

m <- matrix(
    rnorm(12), nrow = 4, ncol = 3,
    dimnames = list(letters[1:4], LETTERS[1:3])
)
m
##             A          B          C
## a -0.56047565  0.1292877 -0.6868529
## b -0.23017749  1.7150650 -0.4456620
## c  1.55870831  0.4609162  1.2240818
## d  0.07050839 -1.2650612  0.3598138
dim(m)
## [1] 4 3
dimnames(m)
## [[1]]
## [1] "a" "b" "c" "d"
## 
## [[2]]
## [1] "A" "B" "C"
m[,1,drop=FALSE]
##             A
## a -0.56047565
## b -0.23017749
## c  1.55870831
## d  0.07050839

SummarizedExperiment has all the matrix-like interface functions incorporated

subset <- airway[1:5,1:4]
assay(subset)
##                 SRR1039508 SRR1039509 SRR1039512 SRR1039513
## ENSG00000000003        679        448        873        408
## ENSG00000000005          0          0          0          0
## ENSG00000000419        467        515        621        365
## ENSG00000000457        260        211        263        164
## ENSG00000000460         60         55         40         35
colData(subset)
## DataFrame with 4 rows and 9 columns
##            SampleName     cell      dex    albut        Run avgLength
##              <factor> <factor> <factor> <factor>   <factor> <integer>
## SRR1039508 GSM1275862   N61311    untrt    untrt SRR1039508       126
## SRR1039509 GSM1275863   N61311      trt    untrt SRR1039509       126
## SRR1039512 GSM1275866  N052611    untrt    untrt SRR1039512       126
## SRR1039513 GSM1275867  N052611      trt    untrt SRR1039513        87
##            Experiment    Sample    BioSample
##              <factor>  <factor>     <factor>
## SRR1039508  SRX384345 SRS508568 SAMN02422669
## SRR1039509  SRX384346 SRS508567 SAMN02422675
## SRR1039512  SRX384349 SRS508571 SAMN02422678
## SRR1039513  SRX384350 SRS508572 SAMN02422670
rowRanges(subset)
## GRangesList object of length 5:
## $ENSG00000000003 
## GRanges object with 17 ranges and 2 metadata columns:
##        seqnames            ranges strand |   exon_id       exon_name
##           <Rle>         <IRanges>  <Rle> | <integer>     <character>
##    [1]        X 99883667-99884983      - |    667145 ENSE00001459322
##    [2]        X 99885756-99885863      - |    667146 ENSE00000868868
##    [3]        X 99887482-99887565      - |    667147 ENSE00000401072
##    [4]        X 99887538-99887565      - |    667148 ENSE00001849132
##    [5]        X 99888402-99888536      - |    667149 ENSE00003554016
##    ...      ...               ...    ... .       ...             ...
##   [13]        X 99890555-99890743      - |    667156 ENSE00003512331
##   [14]        X 99891188-99891686      - |    667158 ENSE00001886883
##   [15]        X 99891605-99891803      - |    667159 ENSE00001855382
##   [16]        X 99891790-99892101      - |    667160 ENSE00001863395
##   [17]        X 99894942-99894988      - |    667161 ENSE00001828996
## 
## ...
## <4 more elements>
## -------
## seqinfo: 722 sequences (1 circular) from an unspecified genome

Matrix operations

rowSums(m)
##         a         b         c         d 
## -1.118041  1.039226  3.243706 -0.834739
colSums(m)
##         A         B         C 
## 0.8385636 1.0402077 0.4513808
Exercises

Overall sample counts quantification * Representation of technical variation between samples

colSums(assay(airway))
## SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516 SRR1039517 
##   20637971   18809481   25348649   15163415   24448408   30818215 
## SRR1039520 SRR1039521 
##   19126151   21164133
  • Average number of reads per gene (not necessarily gene expression, because read counts depend on gene length, etc.)
genemeans <- rowMeans(log(assay(airway) + 1, base = exp(1)))
ggplot(data=data.frame(genemeans),
       aes(x=genemeans)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Subsetting SummarizedExperiments

colData(airway)
## DataFrame with 8 rows and 9 columns
##            SampleName     cell      dex    albut        Run avgLength
##              <factor> <factor> <factor> <factor>   <factor> <integer>
## SRR1039508 GSM1275862   N61311    untrt    untrt SRR1039508       126
## SRR1039509 GSM1275863   N61311      trt    untrt SRR1039509       126
## SRR1039512 GSM1275866  N052611    untrt    untrt SRR1039512       126
## SRR1039513 GSM1275867  N052611      trt    untrt SRR1039513        87
## SRR1039516 GSM1275870  N080611    untrt    untrt SRR1039516       120
## SRR1039517 GSM1275871  N080611      trt    untrt SRR1039517       126
## SRR1039520 GSM1275874  N061011    untrt    untrt SRR1039520       101
## SRR1039521 GSM1275875  N061011      trt    untrt SRR1039521        98
##            Experiment    Sample    BioSample
##              <factor>  <factor>     <factor>
## SRR1039508  SRX384345 SRS508568 SAMN02422669
## SRR1039509  SRX384346 SRS508567 SAMN02422675
## SRR1039512  SRX384349 SRS508571 SAMN02422678
## SRR1039513  SRX384350 SRS508572 SAMN02422670
## SRR1039516  SRX384353 SRS508575 SAMN02422682
## SRR1039517  SRX384354 SRS508576 SAMN02422673
## SRR1039520  SRX384357 SRS508579 SAMN02422683
## SRR1039521  SRX384358 SRS508580 SAMN02422677
airway$dex
## [1] untrt trt   untrt trt   untrt trt   untrt trt  
## Levels: trt untrt

Exercise:

colData(airway[,airway$dex == 'untrt'])
## DataFrame with 4 rows and 9 columns
##            SampleName     cell      dex    albut        Run avgLength
##              <factor> <factor> <factor> <factor>   <factor> <integer>
## SRR1039508 GSM1275862   N61311    untrt    untrt SRR1039508       126
## SRR1039512 GSM1275866  N052611    untrt    untrt SRR1039512       126
## SRR1039516 GSM1275870  N080611    untrt    untrt SRR1039516       120
## SRR1039520 GSM1275874  N061011    untrt    untrt SRR1039520       101
##            Experiment    Sample    BioSample
##              <factor>  <factor>     <factor>
## SRR1039508  SRX384345 SRS508568 SAMN02422669
## SRR1039512  SRX384349 SRS508571 SAMN02422678
## SRR1039516  SRX384353 SRS508575 SAMN02422682
## SRR1039520  SRX384357 SRS508579 SAMN02422683
  • Exercise Remove all the rows from airway that had no gene expression across all samples. How many rows had no expression? Re-calculate the histogram of log-transoformed average expression with these rows excluded.
nullgenes <- rowSums(assay(airway)) == 0
airwaynonull <- airway[!nullgenes,,drop=FALSE]
table(nullgenes)
## nullgenes
## FALSE  TRUE 
## 33469 30633
genemeans <- rowMeans(log(assay(airwaynonull) + 1, base = exp(1)))
ggplot(data=data.frame(genemeans),
       aes(x=genemeans)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The list-like interface of GRangesList

Endomorphisms of list: subsetting them or applying functions will yield another list as output. Except for double squared brackets

a <- list(v=seq(1,5), names = c('manola','carmona'))
lengths(a)
##     v names 
##     5     2

another

seqinfo contains info of genome annotation from the genomic data * circular, sequence lengths, genome

r <- rowRanges(airway)
seqinfo(r)
## Seqinfo object with 722 sequences (1 circular) from an unspecified genome:
##   seqnames seqlengths isCircular genome
##   1         249250621      FALSE   <NA>
##   2         243199373      FALSE   <NA>
##   3         198022430      FALSE   <NA>
##   4         191154276      FALSE   <NA>
##   5         180915260      FALSE   <NA>
##   ...             ...        ...    ...
##   LRG_94        12428      FALSE   <NA>
##   LRG_96        93210      FALSE   <NA>
##   LRG_97        25996      FALSE   <NA>
##   LRG_98        18750      FALSE   <NA>
##   LRG_99        13294      FALSE   <NA>

Annotation data can sometimes be accessed from browseVignetteS:

browseVignettes("airway")
## starting httpd help server ... done
library(dplyr)
seqinfo(r) %>% as('GRanges') %>% subset(seqnames == 14) -> chr14
chr14
## GRanges object with 1 range and 0 metadata columns:
##      seqnames      ranges strand
##         <Rle>   <IRanges>  <Rle>
##   14       14 1-107349540      *
##   -------
##   seqinfo: 722 sequences (1 circular) from an unspecified genome

Over = overlaps

rowRanges(airway)[rowRanges(airway) %over% chr14]
## GRangesList object of length 2244:
## $ENSG00000006432 
## GRanges object with 26 ranges and 2 metadata columns:
##        seqnames            ranges strand |   exon_id       exon_name
##           <Rle>         <IRanges>  <Rle> | <integer>     <character>
##    [1]       14 71189243-71197581      - |    453195 ENSE00002597852
##    [2]       14 71196368-71197581      - |    453196 ENSE00002485643
##    [3]       14 71196383-71197581      - |    453197 ENSE00002510454
##    [4]       14 71196653-71197581      - |    453198 ENSE00002249042
##    [5]       14 71196852-71197581      - |    453199 ENSE00001518020
##    ...      ...               ...    ... .       ...             ...
##   [22]       14 71249940-71250162      - |    453216 ENSE00002524190
##   [23]       14 71267384-71267797      - |    453217 ENSE00001137145
##   [24]       14 71275483-71275888      - |    453218 ENSE00001518024
##   [25]       14 71275483-71276223      - |    453219 ENSE00002468641
##   [26]       14 71275483-71276251      - |    453220 ENSE00002511719
## 
## ...
## <2243 more elements>
## -------
## seqinfo: 722 sequences (1 circular) from an unspecified genome
ridx <- rowRanges(airway) %over% chr14
airway[ridx,]
## class: RangedSummarizedExperiment 
## dim: 2244 8 
## metadata(1): ''
## assays(1): counts
## rownames(2244): ENSG00000006432 ENSG00000009830 ...
##   ENSG00000273259 ENSG00000273307
## rowData names(0):
## colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
## colData names(9): SampleName cell ... Sample BioSample

Provenance

#TODO

Session 4 - Working with biological annotations

Mapping between symbols

org packages

Look for packages with org in their name

  • org = organism-based
  • hs = homo sapiens
  • eg = entrez gene id are central symbols
BiocManager::available("^org\\.")
##  [1] "org.Ag.eg.db"      "org.At.tair.db"    "org.Bt.eg.db"     
##  [4] "org.Ce.eg.db"      "org.Cf.eg.db"      "org.Dm.eg.db"     
##  [7] "org.Dr.eg.db"      "org.EcK12.eg.db"   "org.EcSakai.eg.db"
## [10] "org.Gg.eg.db"      "org.Hs.eg.db"      "org.Mm.eg.db"     
## [13] "org.Mmu.eg.db"     "org.Pf.plasmo.db"  "org.Pt.eg.db"     
## [16] "org.Rn.eg.db"      "org.Sc.sgd.db"     "org.Ss.eg.db"     
## [19] "org.Xl.eg.db"
# BiocManager::install("org.Hs.eg.db")
library(org.Hs.eg.db)
library(AnnotationHub)
org <- org.Hs.eg.db

keytypes availabel for org?

keytypes(org)
##  [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT" 
##  [5] "ENSEMBLTRANS" "ENTREZID"     "ENZYME"       "EVIDENCE"    
##  [9] "EVIDENCEALL"  "GENENAME"     "GO"           "GOALL"       
## [13] "IPI"          "MAP"          "OMIM"         "ONTOLOGY"    
## [17] "ONTOLOGYALL"  "PATH"         "PFAM"         "PMID"        
## [21] "PROSITE"      "REFSEQ"       "SYMBOL"       "UCSCKG"      
## [25] "UNIGENE"      "UNIPROT"

Things that we can map to

columns(org)
##  [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT" 
##  [5] "ENSEMBLTRANS" "ENTREZID"     "ENZYME"       "EVIDENCE"    
##  [9] "EVIDENCEALL"  "GENENAME"     "GO"           "GOALL"       
## [13] "IPI"          "MAP"          "OMIM"         "ONTOLOGY"    
## [17] "ONTOLOGYALL"  "PATH"         "PFAM"         "PMID"        
## [21] "PROSITE"      "REFSEQ"       "SYMBOL"       "UCSCKG"      
## [25] "UNIGENE"      "UNIPROT"

Mappings can be done via: * 1 to 1 mapping * 1 to many

ensid <- sample(rownames(airwaynonull), 10)

Mappings via mapIds

mapIds(org, ensid, "SYMBOL", "ENSEMBL") %>% tibble::enframe("Ensemble","Symbol")
## 'select()' returned 1:1 mapping between keys and columns
## # A tibble: 10 x 2
##    Ensemble        Symbol   
##    <chr>           <chr>    
##  1 ENSG00000267086 <NA>     
##  2 ENSG00000184260 HIST2H2AC
##  3 ENSG00000189280 GJB5     
##  4 ENSG00000253121 <NA>     
##  5 ENSG00000237857 <NA>     
##  6 ENSG00000154451 GBP5     
##  7 ENSG00000264201 MIR4701  
##  8 ENSG00000259098 <NA>     
##  9 ENSG00000144369 FAM171B  
## 10 ENSG00000180658 OR2A4
mapIds(org, ensid, "GENENAME", "ENSEMBL") %>% tibble::enframe("GeneName","ENSEMBL")
## 'select()' returned 1:1 mapping between keys and columns
## # A tibble: 10 x 2
##    GeneName        ENSEMBL                                         
##    <chr>           <chr>                                           
##  1 ENSG00000267086 <NA>                                            
##  2 ENSG00000184260 histone cluster 2 H2A family member c           
##  3 ENSG00000189280 gap junction protein beta 5                     
##  4 ENSG00000253121 <NA>                                            
##  5 ENSG00000237857 <NA>                                            
##  6 ENSG00000154451 guanylate binding protein 5                     
##  7 ENSG00000264201 microRNA 4701                                   
##  8 ENSG00000259098 <NA>                                            
##  9 ENSG00000144369 family with sequence similarity 171 member B    
## 10 ENSG00000180658 olfactory receptor family 2 subfamily A member 4

Multiple mappings:

AnnotationDbi::select(org, ensid, c("ENTREZID", "SYMBOL"), "ENSEMBL")
## 'select()' returned 1:1 mapping between keys and columns
##            ENSEMBL  ENTREZID    SYMBOL
## 1  ENSG00000267086      <NA>      <NA>
## 2  ENSG00000184260      8338 HIST2H2AC
## 3  ENSG00000189280      2709      GJB5
## 4  ENSG00000253121      <NA>      <NA>
## 5  ENSG00000237857      <NA>      <NA>
## 6  ENSG00000154451    115362      GBP5
## 7  ENSG00000264201 100616262   MIR4701
## 8  ENSG00000259098      <NA>      <NA>
## 9  ENSG00000144369    165215   FAM171B
## 10 ENSG00000180658     79541     OR2A4
AnnotationDbi::select(org, ensid, c("GO"), "ENSEMBL")
## 'select()' returned 1:many mapping between keys and columns
##            ENSEMBL         GO EVIDENCE ONTOLOGY
## 1  ENSG00000267086       <NA>     <NA>     <NA>
## 2  ENSG00000184260 GO:0000786      IEA       CC
## 3  ENSG00000184260 GO:0003674       ND       MF
## 4  ENSG00000184260 GO:0005634      HDA       CC
## 5  ENSG00000184260 GO:0005634      IDA       CC
## 6  ENSG00000184260 GO:0008150       ND       BP
## 7  ENSG00000184260 GO:0046982      IEA       MF
## 8  ENSG00000184260 GO:0070062      HDA       CC
## 9  ENSG00000189280 GO:0005922      IEA       CC
## 10 ENSG00000189280 GO:0007154      IEA       BP
## 11 ENSG00000189280 GO:0008544      TAS       BP
## 12 ENSG00000189280 GO:0016021      IEA       CC
## 13 ENSG00000189280 GO:0060707      IEA       BP
## 14 ENSG00000189280 GO:0060708      IEA       BP
## 15 ENSG00000189280 GO:0060713      IEA       BP
## 16 ENSG00000189280 GO:1905867      IEA       BP
## 17 ENSG00000253121       <NA>     <NA>     <NA>
## 18 ENSG00000237857       <NA>     <NA>     <NA>
## 19 ENSG00000154451 GO:0000139      IEA       CC
## 20 ENSG00000154451 GO:0003924      IEA       MF
## 21 ENSG00000154451 GO:0005515      IPI       MF
## 22 ENSG00000154451 GO:0005525      IEA       MF
## 23 ENSG00000154451 GO:0005737      IDA       CC
## 24 ENSG00000154451 GO:0005794      IDA       CC
## 25 ENSG00000154451 GO:0006954      IEA       BP
## 26 ENSG00000154451 GO:0009617      IEA       BP
## 27 ENSG00000154451 GO:0016020      HDA       CC
## 28 ENSG00000154451 GO:0031410      IEA       CC
## 29 ENSG00000154451 GO:0034067      IDA       BP
## 30 ENSG00000154451 GO:0042802      IPI       MF
## 31 ENSG00000154451 GO:0042803      IDA       MF
## 32 ENSG00000154451 GO:0045089      ISS       BP
## 33 ENSG00000154451 GO:0048471      IDA       CC
## 34 ENSG00000154451 GO:0050702      IMP       BP
## 35 ENSG00000154451 GO:0051289      IDA       BP
## 36 ENSG00000154451 GO:0071346      IEP       BP
## 37 ENSG00000154451 GO:0072616      ISS       BP
## 38 ENSG00000154451 GO:1900017      ISS       BP
## 39 ENSG00000154451 GO:1900227      IDA       BP
## 40 ENSG00000264201       <NA>     <NA>     <NA>
## 41 ENSG00000259098       <NA>     <NA>     <NA>
## 42 ENSG00000144369 GO:0016021      IEA       CC
## 43 ENSG00000180658 GO:0003674       ND       MF
## 44 ENSG00000180658 GO:0004930      IEA       MF
## 45 ENSG00000180658 GO:0004984      IEA       MF
## 46 ENSG00000180658 GO:0005886      TAS       CC
## 47 ENSG00000180658 GO:0016021      IEA       CC
## 48 ENSG00000180658 GO:0032154      IDA       CC
## 49 ENSG00000180658 GO:0032467      IMP       BP
## 50 ENSG00000180658 GO:0032956      IMP       BP
## 51 ENSG00000180658 GO:0050911      IEA       BP
## 52 ENSG00000180658 GO:0055037      IDA       CC
## 53 ENSG00000180658 GO:0090543      IDA       CC
## 54 ENSG00000180658 GO:0097431      IDA       CC
## 55 ENSG00000180658 GO:1990023      IDA       CC
goid <- select(org, ensid, "GO", "ENSEMBL") %>% as_tibble()
## 'select()' returned 1:many mapping between keys and columns
head(goid)
## # A tibble: 6 x 4
##   ENSEMBL         GO         EVIDENCE ONTOLOGY
##   <chr>           <chr>      <chr>    <chr>   
## 1 ENSG00000267086 <NA>       <NA>     <NA>    
## 2 ENSG00000184260 GO:0000786 IEA      CC      
## 3 ENSG00000184260 GO:0003674 ND       MF      
## 4 ENSG00000184260 GO:0005634 HDA      CC      
## 5 ENSG00000184260 GO:0005634 IDA      CC      
## 6 ENSG00000184260 GO:0008150 ND       BP

Package with similar structure for GO terms

library(GO.db)
## 
columns(GO.db)
## [1] "DEFINITION" "GOID"       "ONTOLOGY"   "TERM"
keytypes(GO.db)
## [1] "DEFINITION" "GOID"       "ONTOLOGY"   "TERM"
gid <- unique(head(goid$GO))
gid
## [1] NA           "GO:0000786" "GO:0003674" "GO:0005634" "GO:0008150"
AnnotationDbi::select(GO.db, gid, "TERM", "GOID")
## 'select()' returned 1:1 mapping between keys and columns
##         GOID               TERM
## 1       <NA>               <NA>
## 2 GO:0000786         nucleosome
## 3 GO:0003674 molecular_function
## 4 GO:0005634            nucleus
## 5 GO:0008150 biological_process

AnnotationHub Cloud resource

Good alternative: annotation hub (web service) –> database

library(AnnotationHub)

beware if there are multiple versions of the same thing (check metadata columns)

hub <- AnnotationHub()
## snapshotDate(): 2019-05-02
query(hub, "^org\\.")
## AnnotationHub with 1710 records
## # snapshotDate(): 2019-05-02 
## # $dataprovider: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/
## # $species: Escherichia coli, 'Chlorella vulgaris'_C-169, 'Klebsiella a...
## # $rdataclass: OrgDb
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## #   tags, rdatapath, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["AH70563"]]' 
## 
##             title                                                     
##   AH70563 | org.Ag.eg.db.sqlite                                       
##   AH70564 | org.At.tair.db.sqlite                                     
##   AH70565 | org.Bt.eg.db.sqlite                                       
##   AH70566 | org.Cf.eg.db.sqlite                                       
##   AH70567 | org.Gg.eg.db.sqlite                                       
##   ...       ...                                                       
##   AH73812 | org.Plasmodium_vivax.eg.sqlite                            
##   AH73813 | org.Burkholderia_mallei_ATCC_23344.eg.sqlite              
##   AH73814 | org.Bacillus_cereus_(strain_ATCC_14579_|_DSM_31).eg.sqlite
##   AH73815 | org.Bacillus_cereus_ATCC_14579.eg.sqlite                  
##   AH73816 | org.Schizosaccharomyces_cryophilus_OY26.eg.sqlite
hub["AH70564"]
## AnnotationHub with 1 record
## # snapshotDate(): 2019-05-02 
## # names(): AH70564
## # $dataprovider: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/
## # $species: Arabidopsis thaliana
## # $rdataclass: OrgDb
## # $rdatadateadded: 2019-04-29
## # $title: org.At.tair.db.sqlite
## # $description: NCBI gene ID based annotations about Arabidopsis thaliana
## # $taxonomyid: 3702
## # $genome: NCBI genomes
## # $sourcetype: NCBI/ensembl
## # $sourceurl: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/, ftp://ftp.ensembl....
## # $sourcesize: NA
## # $tags: c("NCBI", "Gene", "Annotation") 
## # retrieve record with 'object[["AH70564"]]'
atorg <- hub[["AH70564"]]
## downloading 0 resources
## loading from cache 
##     'AH70564 : 77310'
zoo <- hub["AH70564"]

Transcript annotations

TxDb packages

Look for available packages

BiocManager::available("^TxDb\\.Hsap") %>% tibble::enframe(name=NULL)
## # A tibble: 5 x 1
##   value                                      
##   <chr>                                      
## 1 TxDb.Hsapiens.BioMart.igis                 
## 2 TxDb.Hsapiens.UCSC.hg18.knownGene          
## 3 TxDb.Hsapiens.UCSC.hg19.knownGene          
## 4 TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts
## 5 TxDb.Hsapiens.UCSC.hg38.knownGene
query(hub, "^TxDb\\.Hsap")
## AnnotationHub with 6 records
## # snapshotDate(): 2019-05-02 
## # $dataprovider: UCSC
## # $species: Homo sapiens
## # $rdataclass: TxDb
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## #   tags, rdatapath, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["AH52256"]]' 
## 
##             title                                             
##   AH52256 | TxDb.Hsapiens.BioMart.igis.sqlite                 
##   AH52257 | TxDb.Hsapiens.UCSC.hg18.knownGene.sqlite          
##   AH52258 | TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite          
##   AH52259 | TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts.sqlite
##   AH52260 | TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite          
##   AH70591 | TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
## Loading required package: GenomicFeatures
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
# Look for exons
ex <- exons(txdb)
ex
## GRanges object with 647025 ranges and 1 metadata column:
##                    seqnames        ranges strand |   exon_id
##                       <Rle>     <IRanges>  <Rle> | <integer>
##        [1]             chr1   11869-12227      + |         1
##        [2]             chr1   12010-12057      + |         2
##        [3]             chr1   12179-12227      + |         3
##        [4]             chr1   12613-12697      + |         4
##        [5]             chr1   12613-12721      + |         5
##        ...              ...           ...    ... .       ...
##   [647021] chrUn_GL000220v1 155997-156149      + |    647021
##   [647022] chrUn_KI270442v1 380608-380726      + |    647022
##   [647023] chrUn_KI270442v1 217250-217401      - |    647023
##   [647024] chrUn_KI270744v1   51009-51114      - |    647024
##   [647025] chrUn_KI270750v1 148668-148843      + |    647025
##   -------
##   seqinfo: 595 sequences (1 circular) from hg38 genome
transcripts(txdb)
## GRanges object with 226811 ranges and 2 metadata columns:
##                    seqnames        ranges strand |     tx_id
##                       <Rle>     <IRanges>  <Rle> | <integer>
##        [1]             chr1   11869-14409      + |         1
##        [2]             chr1   12010-13670      + |         2
##        [3]             chr1   29554-31097      + |         3
##        [4]             chr1   30267-31109      + |         4
##        [5]             chr1   30366-30503      + |         5
##        ...              ...           ...    ... .       ...
##   [226807] chrUn_GL000220v1 155997-156149      + |    226807
##   [226808] chrUn_KI270442v1 380608-380726      + |    226808
##   [226809] chrUn_KI270442v1 217250-217401      - |    226809
##   [226810] chrUn_KI270744v1   51009-51114      - |    226810
##   [226811] chrUn_KI270750v1 148668-148843      + |    226811
##                      tx_name
##                  <character>
##        [1] ENST00000456328.2
##        [2] ENST00000450305.2
##        [3] ENST00000473358.1
##        [4] ENST00000469289.1
##        [5] ENST00000607096.1
##        ...               ...
##   [226807] ENST00000619779.1
##   [226808] ENST00000620265.1
##   [226809] ENST00000611690.1
##   [226810] ENST00000616830.1
##   [226811] ENST00000612925.1
##   -------
##   seqinfo: 595 sequences (1 circular) from hg38 genome
exByTx = exonsBy(txdb, "tx")
exByTx
## GRangesList object of length 226811:
## $1 
## GRanges object with 3 ranges and 3 metadata columns:
##       seqnames      ranges strand |   exon_id   exon_name exon_rank
##          <Rle>   <IRanges>  <Rle> | <integer> <character> <integer>
##   [1]     chr1 11869-12227      + |         1        <NA>         1
##   [2]     chr1 12613-12721      + |         5        <NA>         2
##   [3]     chr1 13221-14409      + |         8        <NA>         3
## 
## $2 
## GRanges object with 6 ranges and 3 metadata columns:
##       seqnames      ranges strand | exon_id exon_name exon_rank
##   [1]     chr1 12010-12057      + |       2      <NA>         1
##   [2]     chr1 12179-12227      + |       3      <NA>         2
##   [3]     chr1 12613-12697      + |       4      <NA>         3
##   [4]     chr1 12975-13052      + |       6      <NA>         4
##   [5]     chr1 13221-13374      + |       7      <NA>         5
##   [6]     chr1 13453-13670      + |       9      <NA>         6
## 
## $3 
## GRanges object with 3 ranges and 3 metadata columns:
##       seqnames      ranges strand | exon_id exon_name exon_rank
##   [1]     chr1 29554-30039      + |      10      <NA>         1
##   [2]     chr1 30564-30667      + |      13      <NA>         2
##   [3]     chr1 30976-31097      + |      14      <NA>         3
## 
## ...
## <226808 more elements>
## -------
## seqinfo: 595 sequences (1 circular) from hg38 genome
which.max(lengths(exByTx))
## 31375 
## 31375

Exons from the gene:

exByTx[31375]
## GRangesList object of length 1:
## $31375 
## GRanges object with 363 ranges and 3 metadata columns:
##         seqnames              ranges strand |   exon_id   exon_name
##            <Rle>           <IRanges>  <Rle> | <integer> <character>
##     [1]     chr2 178807212-178807423      - |     92853        <NA>
##     [2]     chr2 178804552-178804655      - |     92850        <NA>
##     [3]     chr2 178802138-178802341      - |     92847        <NA>
##     [4]     chr2 178800395-178800682      - |     92846        <NA>
##     [5]     chr2 178799825-178799910      - |     92845        <NA>
##     ...      ...                 ...    ... .       ...         ...
##   [359]     chr2 178529960-178530116      - |     92476        <NA>
##   [360]     chr2 178528528-178529219      - |     92475        <NA>
##   [361]     chr2 178528274-178528427      - |     92474        <NA>
##   [362]     chr2 178527446-178527748      - |     92473        <NA>
##   [363]     chr2 178525989-178527307      - |     92471        <NA>
##         exon_rank
##         <integer>
##     [1]         1
##     [2]         2
##     [3]         3
##     [4]         4
##     [5]         5
##     ...       ...
##   [359]       359
##   [360]       360
##   [361]       361
##   [362]       362
##   [363]       363
## 
## -------
## seqinfo: 595 sequences (1 circular) from hg38 genome

TCGA

BiocManager::available("curate")
## [1] "curatedAdipoChIP"       "curatedAdipoRNA"       
## [3] "curatedBladderData"     "curatedBreastData"     
## [5] "curatedCRCData"         "curatedMetagenomicData"
## [7] "curatedOvarianData"     "curatedTCGAData"

Download data from TCGA

library(curatedTCGAData)
## Loading required package: MultiAssayExperiment
curatedTCGAData()
## Please see the list below for available cohorts and assays
## Available Cancer codes:
##  ACC BLCA BRCA CESC CHOL COAD DLBC ESCA GBM HNSC KICH
##  KIRC KIRP LAML LGG LIHC LUAD LUSC MESO OV PAAD PCPG
##  PRAD READ SARC SKCM STAD TGCT THCA THYM UCEC UCS UVM 
## Available Data Types:
##  CNACGH CNACGH_CGH_hg_244a
##  CNACGH_CGH_hg_415k_g4124a CNASeq CNASNP
##  CNVSNP GISTIC_AllByGene GISTIC_Peaks
##  GISTIC_ThresholdedByGene Methylation
##  Methylation_methyl27 Methylation_methyl450
##  miRNAArray miRNASeqGene mRNAArray
##  mRNAArray_huex mRNAArray_TX_g4502a
##  mRNAArray_TX_g4502a_1
##  mRNAArray_TX_ht_hg_u133a Mutation
##  RNASeq2GeneNorm RNASeqGene RPPAArray
curatedTCGAData("BRCA")
##                                         Title DispatchClass
## 31                       BRCA_CNASeq-20160128           Rda
## 32                       BRCA_CNASNP-20160128           Rda
## 33                       BRCA_CNVSNP-20160128           Rda
## 35             BRCA_GISTIC_AllByGene-20160128           Rda
## 36                 BRCA_GISTIC_Peaks-20160128           Rda
## 37     BRCA_GISTIC_ThresholdedByGene-20160128           Rda
## 39  BRCA_Methylation_methyl27-20160128_assays        H5File
## 40      BRCA_Methylation_methyl27-20160128_se           Rds
## 41 BRCA_Methylation_methyl450-20160128_assays        H5File
## 42     BRCA_Methylation_methyl450-20160128_se           Rds
## 43                 BRCA_miRNASeqGene-20160128           Rda
## 44                    BRCA_mRNAArray-20160128           Rda
## 45                     BRCA_Mutation-20160128           Rda
## 46              BRCA_RNASeq2GeneNorm-20160128           Rda
## 47                   BRCA_RNASeqGene-20160128           Rda
## 48                    BRCA_RPPAArray-20160128           Rda
mae <- curatedTCGAData("BRCA", c("GISTIC_AllByGene", "RNASeq2GeneNorm"), dry.run=FALSE)
## snapshotDate(): 2019-04-29
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## downloading 0 resources
## loading from cache 
##     'EH588 : 588'
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## downloading 0 resources
## loading from cache 
##     'EH596 : 596'
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## downloading 0 resources
## loading from cache 
##     'EH587 : 587'
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## downloading 0 resources
## loading from cache 
##     'EH590 : 590'
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## downloading 0 resources
## loading from cache 
##     'EH599 : 599'
## harmonizing input:
##   removing 12081 sampleMap rows not in names(experiments)
mae
## A MultiAssayExperiment object of 2 listed
##  experiments with user-defined names and respective classes. 
##  Containing an ExperimentList class object of length 2: 
##  [1] BRCA_GISTIC_AllByGene-20160128: SummarizedExperiment with 24776 rows and 1080 columns 
##  [2] BRCA_RNASeq2GeneNorm-20160128: SummarizedExperiment with 20501 rows and 1212 columns 
## Features: 
##  experiments() - obtain the ExperimentList instance 
##  colData() - the primary/phenotype DataFrame 
##  sampleMap() - the sample availability DataFrame 
##  `$`, `[`, `[[` - extract colData columns, subset, or experiment 
##  *Format() - convert into a long or wide DataFrame 
##  assays() - convert ExperimentList to a SimpleList of matrices
names(assays(mae))
## [1] "BRCA_GISTIC_AllByGene-20160128" "BRCA_RNASeq2GeneNorm-20160128"
mae[["BRCA_RNASeq2GeneNorm-20160128"]]
## class: SummarizedExperiment 
## dim: 20501 1212 
## metadata(0):
## assays(1): ''
## rownames(20501): A1BG A1CF ... psiTPTE22 tAKR
## rowData names(0):
## colnames(1212): TCGA-3C-AAAU-01A-11R-A41B-07
##   TCGA-3C-AALI-01A-11R-A41B-07 ... TCGA-Z7-A8R5-01A-42R-A41B-07
##   TCGA-Z7-A8R6-01A-11R-A41B-07
## colData names(0):

example: calculate distribution of library sizes

mae[["BRCA_RNASeq2GeneNorm-20160128"]] %>% 
  assay () %>%
  colSums() %>% 
  hist()

TODO

biomart, etc

Session info

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] curatedTCGAData_1.6.0                  
##  [2] MultiAssayExperiment_1.10.4            
##  [3] TxDb.Hsapiens.UCSC.hg38.knownGene_3.4.6
##  [4] GenomicFeatures_1.36.3                 
##  [5] GO.db_3.8.2                            
##  [6] AnnotationHub_2.16.0                   
##  [7] BiocFileCache_1.8.0                    
##  [8] dbplyr_1.4.2                           
##  [9] org.Hs.eg.db_3.8.2                     
## [10] AnnotationDbi_1.46.0                   
## [11] airway_1.4.0                           
## [12] SummarizedExperiment_1.14.0            
## [13] DelayedArray_0.10.0                    
## [14] BiocParallel_1.18.0                    
## [15] matrixStats_0.54.0                     
## [16] plotly_4.9.0                           
## [17] Biobase_2.44.0                         
## [18] GenomicRanges_1.36.0                   
## [19] GenomeInfoDb_1.20.0                    
## [20] ggbeeswarm_0.6.0                       
## [21] forcats_0.4.0                          
## [22] stringr_1.4.0                          
## [23] dplyr_0.8.2                            
## [24] purrr_0.3.2                            
## [25] readr_1.3.1                            
## [26] tidyr_0.8.3                            
## [27] tibble_2.1.3                           
## [28] ggplot2_3.2.0                          
## [29] tidyverse_1.2.1                        
## [30] Biostrings_2.52.0                      
## [31] XVector_0.24.0                         
## [32] IRanges_2.18.1                         
## [33] S4Vectors_0.22.0                       
## [34] BiocGenerics_0.30.0                    
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.4-1              rstudioapi_0.10              
##  [3] hexbin_1.27.3                 bit64_0.9-7                  
##  [5] interactiveDisplayBase_1.22.0 fansi_0.4.0                  
##  [7] lubridate_1.7.4               xml2_1.2.0                   
##  [9] knitr_1.23                    zeallot_0.1.0                
## [11] jsonlite_1.6                  Rsamtools_2.0.0              
## [13] broom_0.5.2                   shiny_1.3.2                  
## [15] BiocManager_1.30.4            compiler_3.6.0               
## [17] httr_1.4.0                    backports_1.1.4              
## [19] assertthat_0.2.1              Matrix_1.2-17                
## [21] lazyeval_0.2.2                cli_1.1.0                    
## [23] later_0.8.0                   htmltools_0.3.6              
## [25] prettyunits_1.0.2             tools_3.6.0                  
## [27] gtable_0.3.0                  glue_1.3.1                   
## [29] GenomeInfoDbData_1.2.1        rappdirs_0.3.1               
## [31] Rcpp_1.0.1                    cellranger_1.1.0             
## [33] vctrs_0.1.0                   nlme_3.1-140                 
## [35] ExperimentHub_1.10.0          rtracklayer_1.44.0           
## [37] crosstalk_1.0.0               xfun_0.8                     
## [39] rvest_0.3.4                   mime_0.7                     
## [41] XML_3.98-1.20                 zlibbioc_1.30.0              
## [43] MASS_7.3-51.4                 scales_1.0.0                 
## [45] hms_0.4.2                     promises_1.0.1               
## [47] yaml_2.2.0                    curl_3.3                     
## [49] memoise_1.1.0                 biomaRt_2.40.1               
## [51] stringi_1.4.3                 RSQLite_2.1.1                
## [53] rlang_0.4.0                   pkgconfig_2.0.2              
## [55] bitops_1.0-6                  evaluate_0.14                
## [57] lattice_0.20-38               GenomicAlignments_1.20.1     
## [59] htmlwidgets_1.3               labeling_0.3                 
## [61] bit_1.1-14                    tidyselect_0.2.5             
## [63] magrittr_1.5                  R6_2.4.0                     
## [65] generics_0.0.2                DBI_1.0.0                    
## [67] pillar_1.4.2                  haven_2.1.0                  
## [69] withr_2.1.2                   RCurl_1.95-4.12              
## [71] modelr_0.1.4                  crayon_1.3.4                 
## [73] utf8_1.1.4                    rmarkdown_1.13               
## [75] progress_1.2.2                grid_3.6.0                   
## [77] readxl_1.3.1                  data.table_1.12.2            
## [79] blob_1.1.1                    digest_0.6.19                
## [81] xtable_1.8-4                  httpuv_1.5.1                 
## [83] munsell_0.5.0                 beeswarm_0.2.3               
## [85] viridisLite_0.3.0             vipor_0.4.5